home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-04
/
bipl.zip
/
PROGS.ZIP
/
IDXTEXT.ICN
< prev
next >
Wrap
Text File
|
1992-09-28
|
4KB
|
128 lines
############################################################################
#
# File: idxtext.icn
#
# Subject: Program for creating indexed text-base
#
# Author: Richard L. Goerwitz
#
# Date: July 9, 1991
#
###########################################################################
#
# Version: 1.12
#
###########################################################################
#
# idxtext turns a file associated with gettext() routine into an
# indexed text-base. Though gettext() will work fine with files
# that haven't been indexed via idxtext(), access is faster if the
# indexing is done if the file is, say, over 10k (on my system the
# crossover point is actually about 5k).
#
# Usage is simply "idxtext [-a] file1 [file2 [...]]," where file1,
# file2, etc are the names of gettext-format files that are to be
# (re-)indexed. The -a flag tells idxtext to abort if an index file
# already exists.
#
# Indexed files have a very simple format: keyname tab offset
# [tab offset [etc.]]\n. The first line of the index file is a
# pointer to the last indexed byte of the text-base file it indexes.
#
# BUGS: Index files are too large. Also, I've yet to find a portable
# way of creating unique index names that are capable of being
# uniquely identified with their original text file. It might be
# sensible to hard code the name into the index. The chances of a
# conflict seem remote enough that I haven't bothered. If you're
# worried, use the -a flag.
#
############################################################################
#
# Links: adjuncts
#
# Requires: UNIX or MS-DOS
#
# See also: gettext.icn
#
############################################################################
# declared in adjuncts.icn
# global _slash, _baselen
procedure main(a)
local ABORT, idxfile_name, fname, infile, outfile, _slash, _baselen
local Pathname, getidxname
initial {
if find("UNIX"|"Amiga", &features) then {
_slash := "/"
_baselen := 10
}
else if find("MS-DOS", &features) then {
_slash := "\\"
_baselen := 8
}
else stop("idxtext: OS not supported")
}
if \a[1] == "-a" then ABORT := pop(a)
# Check to see if we have any arguments.
*a = 0 & stop("usage: idxtext [-a] file1 [file2 [...]]")
# Start popping filenames off of the argument list.
while fname := pop(a) do {
# Open input file.
infile := open(fname) |
{ write(&errout, "idxtext: ",fname," not found"); next }
# Get index file name.
idxfile_name := Pathname(fname) || getidxname(fname)
if \ABORT then if close(open(idxfile_name)) then
stop("idxtext: index file ",idxfile_name, " already exists")
outfile := open(idxfile_name, "w") |
stop("idxtext: can't open ", idxfile_name)
# Write index to index.IDX file.
write_index(infile, outfile)
every close(infile | outfile)
}
end
procedure write_index(in, out)
local key_offset_table, w, line, KEY
# Write to out all keys in file "in," with their byte
# offsets.
key_offset_table := table()
while (w := where(in), line := read(in)) do {
line ? {
if ="::" then {
KEY := trim(tab(0))
if not (/key_offset_table[KEY] := KEY || "\t" || w)
then stop("idxtext: duplicate key, ",KEY)
}
}
}
# First line of index contains the offset of the last
# indexed byte in write_index, so that we can still
# search unindexed parts of in.
write(out, where(in))
# Write sorted KEY\toffset lines.
if *key_offset_table > 0 then
every write(out, (!sort(key_offset_table))[2])
return
end